Scenario

Within this chapter developmental plasticity was explored within Acanthochromis polyacanthus that were collected from two different regions (i.e., low-latitude, Cairns, and high-latitude, Mackay). Fish were held in common garden experiments at 28.5 C. Clutch data was collected along with parental morphometeric data to determine how fish from each population performed at 28.5 C, a temperature that was shown to produce similar absoluate aerobic scope performances in a previous study [link]. Once hatched offspring were placed into three different temperature treatments, 28.5, 30, and 31.5 C. After approximately 5-6 months offspring length and weight was measured, as well as CTmax and respiration during CTmax trials.

Load packages

library(tidyverse) # data manipulation
library(ggpubr) # figure arrangement 
library(brms) # Bayesian models
library(StanHeaders)# needed to run Bayesian models
library(rstan) # needed to run Bayesian models
library(standist) # needs to be installed 
library(bayesplot) # needed for MCMC diagnostics 
library(DHARMa) # model validation 
library(ggdist) # partial plots 
library(tidybayes) # partial plots 
library(broom.mixed) # model investigation
library(emmeans) # pairwise comparisons
library(rstanarm) # pairwise comparisons - need for emmeans 
library(ggpubr) # arranging figures

Set working directory

knitr::opts_knit$set(root.dir=working.dir)

Import data

This data set has no point outliers however, in the next chunk 3 samples will be discarded due to poor data quailty

lat_resp_dat <- read_delim("C:/Users/jc527762/OneDrive - James Cook University/PhD dissertation/Data/Chapter4_Latitude/import_files/lat_resp_dat_no_outliers.txt", 
    delim = "\t", escape_double = FALSE, 
    col_types = cols(area = col_skip(), rate.a.spec = col_skip()), 
    trim_ws = TRUE)

Data manipulation

rows_of_data <- count(lat_resp_dat, sampleID)
lat_resp_dat2 <- lat_resp_dat |> 
  mutate(dev.temp = as.factor(dev.temp), 
         replicate = str_sub(sampleID, -1,-1), 
         population = factor(population)) |>
                                           # number of observations = 5758
  filter(sampleID != "56.CARL.137.28,5.1", # 5777 - 76 = 5682
         sampleID != "56.CARL.137.28,5.2", # 5701 - 64 = 5618
         sampleID != "60.LCKM.152.30.1"  # 5637 - 76 = 5542
  ) |> 
  filter(time_lag_sec >2001) |> # remove samples from first 5 cycles (i.e., first 33 minutes) 
  group_by(sampleID) |> 
  mutate(max_value_index = which.max(rate.output), 
         row_number = row_number(), 
         max.rate = max(rate.output), 
         low.rate = mean(rate.output[order(rate.output)[1:10]]), 
         net.rate = max.rate - low.rate) |> 
  filter(row_number <= max_value_index) |>
  ungroup() |> 
  mutate(cMASS = scale(mass, center=TRUE, scale=FALSE)[,1], 
         cAge = scale(age, center=TRUE, scale=FALSE)[,1], 
         region = factor(region), 
         dev.temp =factor(dev.temp), 
         LEVEL = as.factor(case_when(tank >= 1 & tank <= 199 ~ 1,
                           tank >= 200 & tank <= 299 ~ 2,
                           tank >= 300 & tank <= 399 ~ 3,
                           TRUE ~ NA_real_)), 
         female = factor(female))

Exploratory data analysis

eda1 <- ggplot(lat_resp_dat2, aes(x=Temperature, y=rate.output)) + 
  geom_point(alpha=0.5, color = "black") + 
  geom_smooth(method="lm", 
              se=TRUE, 
              fill=NA,
              formula = y~poly(x, 2, raw=TRUE), color = "red") + 
  theme_classic() +
  ggtitle("All data points - 2nd order polynomial")

eda2 <- ggplot(lat_resp_dat2, aes(x=Temperature, y=rate.output, color=region)) + 
  geom_point(alpha=0.5) + 
  geom_smooth(method="lm", 
              se=TRUE, 
              fill=NA,
              formula = y~poly(x, 2, raw=TRUE), color = "red") + 
  facet_wrap(~ region) + theme_classic() +
  theme(legend.position = "none") +
  ggtitle("Region - 2nd order polynomial")

eda3 <- ggplot(lat_resp_dat2, aes(x=Temperature, y=rate.output, color=dev.temp)) + 
  geom_point(alpha=0.1) + 
  geom_smooth(method="lm", 
              se=TRUE, 
              fill=NA,
              formula = y~poly(x, 2, raw=TRUE)) + 
  facet_wrap(~ region) + theme_classic() +
  theme(legend.position = "none") +
  ggtitle("Region - 2nd order polynomial") 

eda4 <- ggplot(lat_resp_dat2, aes(x=Temperature, y=rate.output, color=region)) + 
  geom_point(alpha=0.1) + 
  geom_smooth(method="lm", 
              se=TRUE, 
              fill=NA,
              formula = y~poly(x, 2, raw=TRUE)) + 
  facet_wrap(~ dev.temp) + theme_classic() +
  theme(legend.position = "none") +
  ggtitle("Region - 2nd order polynomial")

eda5 <- ggplot(lat_resp_dat2, aes(x=Temperature, y=rate.output, color=population)) + 
  geom_point(alpha=0.5) + 
  geom_smooth(method="lm", 
              se=TRUE, 
              fill=NA,
              formula = y~poly(x, 2, raw=TRUE), color = "red") + 
  facet_wrap(~ population) + theme_classic() +
  theme(legend.position = "none") +
  ggtitle("Population - 2nd order polynomial")

eda6 <- ggplot(lat_resp_dat2, aes(x=Temperature, y=rate.output, color=dev.temp)) + 
  geom_point(alpha=0.1) + 
  geom_smooth(method="lm", 
              se=TRUE, 
              fill=NA,
              formula = y~poly(x, 2, raw=TRUE)) + 
  facet_wrap(~ population) + theme_classic() +
  theme(legend.position = "none") +
  ggtitle("Population - 2nd order polynomial")

eda.fig <- ggarrange(eda1,eda2,eda3,eda4,eda5,eda6,
          nrow = 6, 
          ncol=1); eda.fig

Model fit

Great lets start to fit our models. There are number of variables within our data frame some of which may or may not be important. To investigate which variables are important and which are not will test different hypotheses that use different combinations of variables that are suited to answer reasonable hypotheses.

From out exploratory data analysis we can see that due to the upwards infliction within the data as fish approach CTmax we will likely need to apply a second order polynomial function to out temperature covariate. Due to the large sample size we have we will also explore how will General Additive Models can be used to model our data.

We will start out by testing two different hypotheses.

  1. The first hypothesis that will be tested will be the base model that includes covariates that are essential to the question we are asking, are there regional differences in developmental plasticity within Acanthchormis polyacanthus juveniles? To answer this question we will include a three-way interaction including region * dev.temp * Temperature, as well as the covariate cMASS, that stands for centered mass values, because we know that mass influences oxygen uptake.
rate.output2 ~ region*dev.temp*poly(Temperature, 2) + cMASS
  1. The second hypothesis will be similar to the base model but will include covariates that are associated with the systems that fish were tested on. Covariates will include chamber and system. By exploring this model we can make sure that the position that chambers were placed in the tanks did not influence oxygen consumption and that the two seems used, labelled DELL and ASUS from the computers that were used to collect data, produced comparable data.
rate.output2 ~ region*dev.temp*poly(Temperature, 2) + cMASS + chamber + system

Base model

model1 <- brm(rate.output2 ~ region*dev.temp*poly(Temperature, 2) + cMASS, 
              family=gaussian(), 
              data=lat_resp_dat2, 
              warmup = 4000, 
              iter=10000, 
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE), 
              chains=4, 
              thin = 5, 
              control = list(adapt_delta=0.95)) 
## Compiling Stan program...
## Start sampling

Model 1- sampling diagnositics

bayesplot

mcmc_plot(model1, type='combo')

mcmc_plot(model1, type='trace') 
## No divergences to plot.

mcmc_plot(model1, type='dens_overlay') 

mcmc_plot(model1, type='acf_bar')

mcmc_plot(model1, type='rhat_hist')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mcmc_plot(model1, type='neff_hist') 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan plots

stan_trace(model1$fit)
## 'pars' not specified. Showing first 10 parameters by default.

stan_ac(model1$fit) 
## 'pars' not specified. Showing first 10 parameters by default.

stan_rhat(model1$fit) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_ess(model1$fit)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_dens(model1$fit, separate_chains = TRUE)
## 'pars' not specified. Showing first 10 parameters by default.

Equipment model

model.equip <- brm(rate.output2 ~ region*dev.temp*poly(Temperature, 2) + cMASS + system + chamber, 
                   family=gaussian(), 
              data=lat_resp_dat2, 
              warmup = 4000, 
              iter=10000, 
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE), 
              chains=4, 
              thin = 5, 
              control = list(adapt_delta=0.95))
## Compiling Stan program...
## Start sampling

Equipment model - sampling diagnositics

bayesplot

mcmc_plot(model.equip, type='combo')

mcmc_plot(model.equip, type='trace') 
## No divergences to plot.

mcmc_plot(model.equip, type='dens_overlay') 

mcmc_plot(model.equip, type='acf_bar')

mcmc_plot(model.equip, type='rhat_hist')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mcmc_plot(model.equip, type='neff_hist') 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan plots

stan_trace(model.equip$fit)
## 'pars' not specified. Showing first 10 parameters by default.

stan_ac(model.equip$fit) 
## 'pars' not specified. Showing first 10 parameters by default.

stan_rhat(model.equip$fit) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_ess(model.equip$fit)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_dens(model.equip$fit, separate_chains = TRUE)
## 'pars' not specified. Showing first 10 parameters by default.

Model compairisons

WAIC

model1.waic <- waic(model1)
## Warning: 
## 2 (0.0%) p_waic estimates greater than 0.4. We recommend trying loo instead.
model.equip.waic <- waic(model.equip) 
## Warning: 
## 2 (0.0%) p_waic estimates greater than 0.4. We recommend trying loo instead.
loo_compare(model1.waic, model.equip.waic) 
##             elpd_diff se_diff
## model.equip   0.0       0.0  
## model1      -54.1      10.6

LOO

LOO(model1, model.equip) 
## Output of model 'model1':
## 
## Computed from 4800 by 4776 log-likelihood matrix.
## 
##          Estimate    SE
## elpd_loo    623.0  63.1
## p_loo        25.6   1.4
## looic     -1246.0 126.2
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.8, 1.1]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## Output of model 'model.equip':
## 
## Computed from 4800 by 4776 log-likelihood matrix.
## 
##          Estimate    SE
## elpd_loo    677.1  64.4
## p_loo        30.0   1.6
## looic     -1354.2 128.8
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.9, 1.1]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## Model comparisons:
##             elpd_diff se_diff
## model.equip   0.0       0.0  
## model1      -54.1      10.6

It seems like additional information of chamber and system do not produce a more informative model

Randomn effects

Before we validate the model we will include out random effects:

  1. LEVEL: What level the tank the clutch was held in, 1,2, or 3.
  2. FEMALE: To account for the factor that some clutches came from the same breeding pair
findpriors = get_prior(rate.output2 ~ region*dev.temp*poly(Temperature, 2) + cMASS + (1|LEVEL) + (1|female), 
              family=gaussian(), 
              data=lat_resp_dat2); findpriors
priors = c(set_prior("normal(0,100)", class="b", coef="cMASS"), 
           set_prior("normal(0,100)", class="b", coef="dev.temp30"),
           set_prior("normal(0,100)", class="b", coef="dev.temp30:polyTemperature21"),
           set_prior("normal(0,100)", class="b", coef="dev.temp30:polyTemperature22"),
           set_prior("normal(0,100)", class="b", coef="dev.temp31"),
           set_prior("normal(0,100)", class="b", coef="dev.temp31:polyTemperature21"),
           set_prior("normal(0,100)", class="b", coef="dev.temp31:polyTemperature22"),
           set_prior("normal(0,100)", class="b", coef="polyTemperature21"),
           set_prior("normal(0,100)", class="b", coef="polyTemperature22"),
           set_prior("normal(0,100)", class="b", coef="regionleading"),
           set_prior("normal(0,100)", class="b", coef="regionleading:dev.temp30"),
           set_prior("normal(0,100)", class="b", coef="regionleading:dev.temp30:polyTemperature21"),
           set_prior("normal(0,100)", class="b", coef="regionleading:dev.temp30:polyTemperature22"),
           set_prior("normal(0,100)", class="b", coef="regionleading:dev.temp31"),
           set_prior("normal(0,100)", class="b", coef="regionleading:dev.temp31:polyTemperature21"),
           set_prior("normal(0,100)", class="b", coef="regionleading:dev.temp31:polyTemperature22"),
           set_prior("normal(0,100)", class="b", coef="regionleading:polyTemperature21"),
           set_prior("normal(0,100)", class="b", coef="regionleading:polyTemperature22")
           )
model1.re <- brm(rate.output2 ~ region*dev.temp*poly(Temperature, 2) + cMASS + (1|LEVEL) + (1|female), 
              family=gaussian(), 
              prior = priors,
              data=lat_resp_dat2, 
              warmup = 1500, 
              iter=4000, 
              seed=123, 
              cores=4, 
              save_pars = save_pars(all=TRUE), 
              chains=3, 
              thin = 5, 
              control = list(adapt_delta=0.95)) 
## Compiling Stan program...
## Start sampling
## Warning: There were 31 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems

Model validation

We will continue with the first model. lets start validating the model to see how it performed. The initial diagnostics seemed to be pretty good. Now lets check to see if it is violating any assumptions.

Distribution

pp_check(model1.re, type = 'dens_overlay')
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

DHARMa residuals

preds <- posterior_predict(model1.re, ndraws=250, summary=FALSE)
model1.re.resids <- createDHARMa(simulatedResponse = t(preds), 
                              observedResponse = na.omit(lat_resp_dat2$rate.output2), 
                              fittedPredictedResponse = apply(preds, 2, median), 
                              integerResponse = 'student')
plot(model1.re.resids) 

conditional_effects

model1.re |> 
  conditional_effects(spaghetti = TRUE, ndraws=200) 

Model investigation

summary

summary(model1.re)
## Warning: There were 31 divergent transitions after warmup. Increasing
## adapt_delta above 0.95 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: rate.output2 ~ region * dev.temp * poly(Temperature, 2) + cMASS + (1 | LEVEL) + (1 | female) 
##    Data: lat_resp_dat2 (Number of observations: 4776) 
##   Draws: 3 chains, each with iter = 4000; warmup = 1500; thin = 5;
##          total post-warmup draws = 1500
## 
## Group-Level Effects: 
## ~female (Number of levels: 15) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.13      0.03     0.09     0.20 1.00     1261     1316
## 
## ~LEVEL (Number of levels: 3) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.05      0.07     0.01     0.26 1.00     1194      906
## 
## Population-Level Effects: 
##                                            Estimate Est.Error l-95% CI u-95% CI
## Intercept                                      0.62      0.05     0.51     0.73
## regionleading                                  0.03      0.07    -0.12     0.17
## dev.temp30                                     0.03      0.01     0.01     0.05
## dev.temp31                                     0.06      0.01     0.04     0.08
## polyTemperature21                              8.04      0.47     7.12     8.96
## polyTemperature22                              5.03      0.49     4.08     6.00
## cMASS                                        416.85      6.01   405.31   429.39
## regionleading:dev.temp30                      -0.06      0.01    -0.08    -0.03
## regionleading:dev.temp31                       0.00      0.01    -0.02     0.03
## regionleading:polyTemperature21               -0.57      0.69    -1.96     0.79
## regionleading:polyTemperature22                0.77      0.70    -0.61     2.13
## dev.temp30:polyTemperature21                  -0.18      0.65    -1.49     1.08
## dev.temp31:polyTemperature21                  -1.07      0.63    -2.26     0.18
## dev.temp30:polyTemperature22                  -0.65      0.64    -1.90     0.57
## dev.temp31:polyTemperature22                  -1.10      0.65    -2.34     0.17
## regionleading:dev.temp30:polyTemperature21     0.45      1.00    -1.43     2.41
## regionleading:dev.temp31:polyTemperature21     0.15      0.93    -1.70     1.96
## regionleading:dev.temp30:polyTemperature22     1.19      0.99    -0.68     3.20
## regionleading:dev.temp31:polyTemperature22     0.50      0.93    -1.30     2.32
##                                            Rhat Bulk_ESS Tail_ESS
## Intercept                                  1.00     1254     1076
## regionleading                              1.00     1376     1417
## dev.temp30                                 1.00     1359     1502
## dev.temp31                                 1.00     1453     1420
## polyTemperature21                          1.00     1556     1426
## polyTemperature22                          1.00     1341     1383
## cMASS                                      1.00     1418     1240
## regionleading:dev.temp30                   1.00     1377     1520
## regionleading:dev.temp31                   1.00     1257     1422
## regionleading:polyTemperature21            1.00     1456     1457
## regionleading:polyTemperature22            1.00     1280     1419
## dev.temp30:polyTemperature21               1.00     1483     1268
## dev.temp31:polyTemperature21               1.00     1402     1407
## dev.temp30:polyTemperature22               1.00     1385     1224
## dev.temp31:polyTemperature22               1.00     1470     1499
## regionleading:dev.temp30:polyTemperature21 1.00     1427     1482
## regionleading:dev.temp31:polyTemperature21 1.00     1523     1478
## regionleading:dev.temp30:polyTemperature22 1.00     1468     1367
## regionleading:dev.temp31:polyTemperature22 1.00     1457     1342
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.19      0.00     0.19     0.19 1.00     1479     1443
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

R2

model1.re |> bayes_R2(summary = FALSE) |> median_hdci()

tidyMCMC

tidyMCMC(model1.re, estimate.method='median', conf.int=TRUE, conf.method='HPDinterval')

gather draws

#model1.re.wo |> get_variables()
model1.re |> gather_draws(`b_.*|sigma`, regex =TRUE) |> 
  median_hdci()

bayesplot

model1.re |> mcmc_plot(type='intervals')

emmeans - pariwise

model1.re |> emmeans(pairwise ~ region*dev.temp, type="response") |> pairs(by="dev.temp") |> summary()
## NOTE: Results may be misleading due to involvement in interactions

probabilities

prob <- model1.re  |> emmeans(pairwise ~ region*dev.temp)
## NOTE: Results may be misleading due to involvement in interactions
prob2 <- prob$contrasts |> gather_emmeans_draws()
prob2 %>% group_by(contrast) %>% dplyr::summarise(Prob = sum(.value>0)/n())